"""
@name : 词频统计
@author : qt
@projectname: Study
@time: 2022/3/5
"""

with open('Walden', 'r') as fp:
    word_rate_dict = {}
    while True:
        buff_list = fp.readlines(6000)
        if not buff_list:
            break
        new_buff_list = []
        for tmp in buff_list:
            # 去除字母和空格以外字符
            if not tmp.isalpha():
                tmp_line = ''
                for one_char in tmp:
                    if not one_char.isalpha() and not one_char == ' ':
                        tmp_line += ' '
                    else:
                        tmp_line += one_char
                new_buff_list.append(tmp_line.lower())
        # 统计词频
        for line in new_buff_list:
            for i in line.split(" "):
                if i in word_rate_dict:
                    word_rate_dict[i] += 1
                else:
                    word_rate_dict[i] = 1
        print(word_rate_dict)
        # for tmp1, tmp2 in word_rate_dict.items():
        #     print(tmp1, tmp2)

